Pacotes necessarios

library(tidyr)
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(corrplot)
## corrplot 0.92 loaded
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(maps)
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths

Lendo o arquivo

df_cafe <- read.csv("df_arabica_clean.csv")

Pegamos as variaveis numericas relevantes

var_num_cafe <- df_cafe[,c(21:26,30,32:35,37)]

names(var_num_cafe) <- c('Aroma', 'Sabor', 'Retrogosto','Acidez','Corpo',
                         'Harmonia', 'Conceito Final', 'Resultado final',
                         'Porcentagem de umidade', 'Defeitos tipo I',
                         'Quakers', 'Defeito tipo II')

Histograma das variaveis sensoriais:

ggplotly(ggplot(df_cafe, aes(x = Aroma, text = paste('Contagem:', ..count..))) + geom_histogram(bins = 40, fill = "#3A1F05") + labs(x = "Caracteristica Sensorial", y = "Frequencia", title = "Histograma do aroma")+
  theme(plot.title= element_text(size=14,
                                 color="#3A1F05",
                                 face="bold"),
        axis.title.x = element_text(face="bold",color="#3A1F05"),
        axis.title.y = element_text(face="bold",color="#3A1F05"),
        axis.text.x= element_text(face="bold"))
,tooltip = 'text')
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## ℹ The deprecated feature was likely used in the base package.
##   Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
ggplotly(ggplot(df_cafe, aes(x = Flavor, y= ..count.., text = paste('Contagem:', ..count..))) + geom_histogram(bins = 40, fill = "#572F07") + labs(x = "Caracteristica Sensorial", y = "Frequencia", title = "Histograma do sabor")+
  theme(plot.title= element_text(size=14,
                                 color="#572F07",
                                 face="bold"),
        axis.title.x = element_text(face="bold", color="#572F07"),
        axis.title.y = element_text(face="bold", color="#572F07"),
        axis.text.x= element_text(face="bold"))
,tooltip = 'text')
ggplotly(ggplot(df_cafe, aes(x = Aftertaste, y= ..count.., text = paste('Contagem:', ..count..))) + geom_histogram(bins = 40, fill = "#6E3B08") + labs(x = "Caracteristica Sensorial", y = "Frequencia", title = "Histograma do retrogosto")+
  theme(plot.title= element_text(size=14,
                                 color="#6E3B08",
                                 face="bold"),
        axis.title.x = element_text(face="bold", color="#6E3B08"),
        axis.title.y = element_text(face="bold", color="#6E3B08"),
        axis.text.x= element_text(face="bold"))
,tooltip = 'text')
ggplotly(ggplot(df_cafe, aes(x = Acidity, y= ..count.., text = paste('Contagem:', ..count..))) + geom_histogram(bins = 40, fill = "#81460A") + labs(x = "Caracteristica Sensorial", y = "Frequencia", title = "Histograma da acidez")+
  theme(plot.title= element_text(size=14,
                                 color="#81460A",
                                 face="bold"),
        axis.title.x = element_text(face="bold", color="#81460A"),
        axis.title.y = element_text(face="bold", color="#81460A"),
        axis.text.x= element_text(face="bold"))
,tooltip = 'text')
ggplotly(ggplot(df_cafe, aes(x = Body, y= ..count.., text = paste('Contagem:', ..count..))) + geom_histogram(bins = 40, fill = "#A5590D") + labs(x = "Caracteristica Sensorial", y = "Frequencia", title = "Histograma do corpo")+
  theme(plot.title= element_text(size=14,
                                 color="#A5590D",
                                 face="bold"),
        axis.title.x = element_text(face="bold", color="#A5590D"),
        axis.title.y = element_text(face="bold", color="#A5590D"),
        axis.text.x= element_text(face="bold"))
,tooltip = 'text')
ggplotly(ggplot(df_cafe, aes(x = Balance, y= ..count.., text = paste('Contagem:', ..count..))) + geom_histogram(bins = 40, fill = "#BD660F") + labs(x = "Caracteristica Sensorial", y = "Frequencia", title = "Histograma da harmonia")+
  theme(plot.title= element_text(size=14,
                                 color="#BD660F",
                                 face="bold"),
        axis.title.x = element_text(face="bold", color="#BD660F"),
        axis.title.y = element_text(face="bold", color="#BD660F"),
        axis.text.x= element_text(face="bold"))
,tooltip = 'text')
ggplotly(ggplot(df_cafe, aes(x = Overall, y= ..count.., text = paste('Contagem:', ..count..))) + geom_histogram(bins = 40, fill = "#D5710D") + labs(x = "Caracteristica Sensorial", y = "Frequencia", title = "Histograma do conceito final")+
  theme(plot.title= element_text(size=14,
                                 color="#D5710D",
                                 face="bold"),
        axis.title.x = element_text(face="bold", color="#D5710D"),
        axis.title.y = element_text(face="bold", color="#D5710D"),
        axis.text.x= element_text(face="bold"))
,tooltip = 'text')
ggplotly(ggplot(df_cafe, aes(x = Total.Cup.Points, y= ..count.., text = paste('Contagem:', ..count..))) + geom_histogram(bins = 40, fill = "#353A35") + labs(x = "Caracteristica Sensorial", y = "Frequencia", title = "Histograma do resultado final")+
  theme(plot.title= element_text(size=14,
                                 color="#353A35",
                                 face="bold"),
        axis.title.x = element_text(face="bold", color="#353A35"),
        axis.title.y = element_text(face="bold", color="#353A35"),
        axis.text.x= element_text(face="bold"))
,tooltip = 'text')
ggplotly(ggplot(df_cafe, aes(x = Moisture.Percentage, y= ..count.., text = paste('Contagem:', ..count..))) + geom_histogram(bins = 40, fill = "#006140") + labs(x = "Caracteristica Sensorial", y = "Frequencia", title = "Histograma da porcentagem de umidade")+
  theme(plot.title= element_text(size=14,
                                 color="#006140",
                                 face="bold"),
        axis.title.x = element_text(face="bold", color="#006140"),
        axis.title.y = element_text(face="bold", color="#006140"),
        axis.text.x= element_text(face="bold"))
,tooltip = 'text')
ggplotly(ggplot(df_cafe, aes(x = Category.One.Defects, y= ..count.., text = paste('Contagem:', ..count..))) + geom_histogram(bins = 12, fill = "#006600") + labs(x = "Caracteristica Sensorial", y = "Frequencia", title = "Histograma da quantidade de defeitos tipo I")+
  theme(plot.title= element_text(size=14,
                                 color="#006600",
                                 face="bold"),
        axis.title.x = element_text(face="bold", color="#006600"),
        axis.title.y = element_text(face="bold", color="#006600"),
        axis.text.x= element_text(face="bold"))
,tooltip = 'text')
ggplotly(ggplot(df_cafe, aes(x = Quakers, y= ..count.., text = paste('Contagem:', ..count..))) + geom_histogram(bins = 12, fill = "#054905") + labs(x = "Caracteristica Sensorial", y = "Frequencia", title = "Histograma da quantidade de quakers")+
  theme(plot.title= element_text(size=14,
                                 color="#054905",
                                 face="bold"),
        axis.title.x = element_text(face="bold", color="#054905"),
        axis.title.y = element_text(face="bold", color="#054905"),
        axis.text.x= element_text(face="bold"))
,tooltip = 'text')
ggplotly(ggplot(df_cafe, aes(x = Category.Two.Defects, y= ..count.., text = paste('Contagem:', ..count..))) + geom_histogram(bins = 12, fill = "#032403") + labs(x = "Caracteristica Sensorial", y = "Frequencia", title = "Histograma da quantidade de defeitos tipo II")+
  theme(plot.title= element_text(size=14,
                                 color="#032403",
                                 face="bold"),
        axis.title.x = element_text(face="bold", color="#032403"),
        axis.title.y = element_text(face="bold", color="#032403"),
        axis.text.x= element_text(face="bold"))
,tooltip = 'text')

Criando matriz de correlacoes

corr_mat <- round(cor(var_num_cafe),2)

Reduzindo o tamanho da matriz

melted_corr_mat <- melt(corr_mat)
head(melted_corr_mat)
##         Var1  Var2 value
## 1      Aroma Aroma  1.00
## 2      Sabor Aroma  0.82
## 3 Retrogosto Aroma  0.79
## 4     Acidez Aroma  0.71
## 5      Corpo Aroma  0.63
## 6   Harmonia Aroma  0.75

Plotando o grafico heatmap de correlacoes

ggplot(data = melted_corr_mat, aes(x=Var1, y=Var2,
                                   fill=value)) +
  geom_tile() +
  scale_fill_gradient2(low = "red", high = "blue", mid = "white", midpoint = 0, limit = c(-1, 1), space = "Lab",
                       name = "Correlation") +
  geom_text(aes(Var2, Var1, label = value),
            color = "black", size = 4) + 
  theme(axis.text.x = element_text(angle=45, vjust = 1, hjust = 1)) +
  labs(title = "Correlacao entre as variaveis numericas") +
  theme(legend.title=element_text(face="bold"),
        plot.title= element_text(size=14,
                                 face="bold"),
        axis.title.x = element_text(face="bold"),
        axis.title.y = element_text(face="bold"))

Podemos observar algumas estatisticas interessantes das variaveis

apply(var_num_cafe, 2, mean)
##                  Aroma                  Sabor             Retrogosto 
##              7.7210628              7.7447343              7.5997585 
##                 Acidez                  Corpo               Harmonia 
##              7.6902899              7.6409179              7.6440580 
##         Conceito Final        Resultado final Porcentagem de umidade 
##              7.6768116             83.7065700             10.7352657 
##        Defeitos tipo I                Quakers        Defeito tipo II 
##              0.1352657              0.6908213              2.2512077
apply(var_num_cafe, 2, var)
##                  Aroma                  Sabor             Retrogosto 
##             0.08272896             0.07818330             0.07612664 
##                 Acidez                  Corpo               Harmonia 
##             0.06734555             0.05452197             0.06568928 
##         Conceito Final        Resultado final Porcentagem de umidade 
##             0.09385580             2.99434303             1.55617748 
##        Defeitos tipo I                Quakers        Defeito tipo II 
##             0.35054641             2.84569204             8.70357863

Calculando a media dos valores sensoriais por pais

df_medias <- df_cafe %>%
  group_by(Country.of.Origin) %>% 
  summarise(
    media_Aroma = mean(Aroma),
    media_Flavor = mean(Flavor),
    media_Aftertaste = mean(Aftertaste),
    media_Acidity = mean(Acidity),
    media_Body = mean(Body),
    media_Balance = mean(Balance),
    media_Overall = mean(Overall),
    media_Total.Cup.Points = mean(Total.Cup.Points),
    media_Moisture.Percentage = mean(Moisture.Percentage)
  ) 

Corrigindo o nome de alguns paises

df_medias$Country.of.Origin[18] <- "Tanzania"
df_medias$Country.of.Origin[21] <- "USA"

names(df_medias)[1] <- "region"

Grafico de barras do desempenho em cada pais

ggplotly(df_medias %>% 
  ggplot(aes(text = paste('Pais:', region,
                          '<br>Pontos totais: ', media_Total.Cup.Points))) +
  geom_col(aes(x = reorder(region, -media_Total.Cup.Points), y = media_Total.Cup.Points), fill = "#753D06") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
  labs(x = "Paises", y = "Valor da media", title = "Media do resultado final de cada pais em grafico de barras")+
  theme(plot.title= element_text(size=14,
                                 color="#120900",
                                 face="bold"),
        axis.title.x = element_text(face="bold"),
        axis.title.y = element_text(face="bold"),
        axis.text.x= element_text(face="bold"))+
  coord_cartesian(ylim=c(70,90))

,tooltip = "text") 

Criando o grafico de mapa

world_map <- map_data("world")

world_map <- left_join(world_map, df_medias, by = "region")

ggplotly(ggplot(world_map , aes( x = long, y = lat, group=group, text = paste('Pais:', region,
                                                                     '<br>Pontos totais: ', media_Total.Cup.Points))) +
  geom_polygon(aes(fill = media_Total.Cup.Points), color = "black") +
  scale_fill_gradient(name = "Valor da media", low = "#DE9B58", high =  "#472401", na.value = "grey50") +
  labs(x = "Longitude", y = "Latitude", title = "Media do resultado final de cada pais em mapa") +
  theme(legend.title=element_text(color="#472401",
                                    face="bold"),
        plot.title= element_text(size=14,
                                 color="#120900",
                                 face="bold"),
        axis.title.x = element_text(face="bold"),
        axis.title.y = element_text(face="bold"))

,tooltip = 'text')